Load packages

library(tidyverse)
library(magrittr)
library(ggplot2)
library(pheatmap)
library(ggrepel)
library(viridisLite)
library(DT)
library(cowplot)
library(here)

source(here("R/plot_expression.R"))
source(here("R/plot_proportions.R"))
source(here("R/plot_structures.R"))
source(here("R/get_profile_plot.R"))

oxygenGenes <- read_csv(here("data/results/oxygenGenes.csv.gz"))
oxygenTranscripts <- read_csv(here("data/results/oxygenTranscripts.csv.gz"))

dtelist <- read_rds(here("data/counts/dtelist.rds"))
dgelist <- read_rds(here("data/counts/dgelist.rds"))

dgelist$samples <- dgelist$samples %>%
  mutate(GestationGroup = Timepoint %>% str_remove("weeks"),
         GestationGroup = factor(GestationGroup, levels = c("6-10", "11-23")))

dtelist$samples <- dtelist$samples %>%
  mutate(GestationGroup = Timepoint %>% str_remove("weeks"),
         GestationGroup = factor(GestationGroup, levels = c("6-10", "11-23")))

Figure S1 - Transcript Expression Volcano Plot

up <- oxygenTranscripts %>%
  dplyr::filter(FDR < 0.05 & logFC >= 1)

down <- oxygenTranscripts %>%
  dplyr::filter(FDR < 0.05 & logFC <= -1)

highlight1 <- oxygenTranscripts %>% 
  dplyr::filter(FDR < 0.01 & logFC > 3) %>%
  arrange(desc(logFC)) %>%
  mutate(direction = "up") %>%
  head(5)

highlight2 <- oxygenTranscripts %>% 
  dplyr::filter(FDR < 0.01 & logFC < -3) %>%
  arrange(desc(abs(logFC))) %>%
  mutate(direction = "down") %>%
  head(5)

highlight <- rbind(highlight1,
                   highlight2) %>%
  distinct(GeneName, .keep_all = TRUE)

volcano <- oxygenTranscripts %>%
  ggplot() +
  geom_point(
    colour = "grey",
    aes(x = logFC,
        y = -log10(FDR))
  ) +
  geom_point(
    data = up,
    colour = "red",
    aes(x = logFC,
        y = -log10(FDR))
  ) +
  geom_point(
    data = down,
    colour = "blue",
    aes(x = logFC,
        y = -log10(FDR))
  ) +
  labs(x = "Log-fold change (logFC)",
       y = "Significance (-log10 FDR)") +
  ggtitle(
    "Differential Transcript Expression: 6-10 weeks' vs 11-23 weeks' gestation"
    ) +
  theme_bw() +
  theme(axis.title = element_text(size = 12,
                                  colour = "black"),
        axis.text = element_text(size = 10,
                                 colour = "black"),
        plot.title = element_text(size = 14,
                                  colour = "black",
                                  face = "bold"))

volcano +
  geom_label_repel(data = highlight,
                   aes(x = logFC,
                       y = -log10(FDR),
                       label = TranscriptName,
                       colour = direction),
                   show.legend = FALSE) +
  scale_colour_manual(values = c("blue", "red"))

Figure S2 - FLT, PGF, IGF2

flt_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "FLT1",
  variable = "GestationGroup",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)")

flt_proportions <- plot_proportions(
  dtelist = dtelist,
  target_gene = "FLT1",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)")

flt_legend <- cowplot::get_legend(plot_proportions(
  dtelist = dtelist,
  target_gene = "FLT1",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = TRUE
) +
  xlab("Gestation Group (weeks)") +
  labs(fill = "Transcript"))

flt_cowplot <- cowplot::plot_grid(flt_exp,
                                  flt_proportions,
                                  flt_legend,
                                  rel_widths = c(1, 1, 0.4),
                                  labels = c("A", "B", " "),
                                  ncol = 3)
pgf_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "PGF",
  variable = "GestationGroup",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)")

pgf_proportions <- plot_proportions(
  dtelist = dtelist,
  target_gene = "PGF",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)")

pgf_legend <- cowplot::get_legend(
  plot_proportions(
    dtelist = dtelist,
    target_gene = "PGF",
    variable = "GestationGroup",
    tpm_method = "tximport",
    path = here("data/counts/salmon_abundance.csv.gz"),
    keep_legend = TRUE
  ) +
    xlab("Gestation Group (weeks)") +
    labs(fill = "Transcript"))

pgf_cowplot <- cowplot::plot_grid(pgf_exp,
                                  pgf_proportions,
                                  pgf_legend,
                                  rel_widths = c(1, 1, 0.4),
                                  labels = c("C", "D", " "),
                                  ncol = 3)
igf_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "IGF2",
  variable = "GestationGroup",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)")

igf_proportions <- plot_proportions(
  dtelist = dtelist,
  target_gene = "IGF2",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)")

igf_legend <- cowplot::get_legend(
  plot_proportions(
    dtelist = dtelist,
    target_gene = "IGF2",
    variable = "GestationGroup",
    tpm_method = "tximport",
    path = here("data/counts/salmon_abundance.csv.gz"),
    keep_legend = TRUE
  ) +
    xlab("Gestation Group (weeks)") +
    labs(fill = "Transcript"))

igf_cowplot <- cowplot::plot_grid(igf_exp,
                                  igf_proportions,
                                  igf_legend,
                                  rel_widths = c(1, 1, 0.4),
                                  labels = c("E", "F", " "),
                                  ncol = 3)
cowplot::plot_grid(
  flt_cowplot,
  pgf_cowplot,
  igf_cowplot,
  nrow = 3
)

Figure S3 - Transcript proportions

cd36_prop <- plot_proportions(
  dtelist = dtelist,
  target_gene = "CD36",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = TRUE
) +
  labs(x = "Gestation Group (weeks)",
       fill = "Transcript")
nrp2_prop <- plot_proportions(
  dtelist = dtelist,
  target_gene = "NRP2",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = TRUE
) +
  labs(x = "Gestation Group (weeks)",
       fill = "Transcript")
cowplot::plot_grid(cd36_prop,
                   nrp2_prop,
                   nrow = 2,
                   labels = c("A", "B"))

Figure S4 - Transcript Expression

cd36_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "CD36",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = TRUE
) +
  labs(x = "Gestational Age (weeks)",
       fill = "Transcript",
       colour = "Transcript")
nucb2_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "NUCB2",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = TRUE
) +
  labs(x = "Gestational Age (weeks)",
       colour = "Transcript",
       fill = "Transcript")
cowplot::plot_grid(cd36_exp,
                   nucb2_exp,
                   nrow = 2,
                   labels = c("A", "B"))

Figure S5 - Gene Heatmap

scale_rows <- function(x){
  m = apply(x, 1, mean, na.rm = T)
  s = apply(x, 1, sd, na.rm = T)
  return((x - m) / s)
}

gene_tx_diff <- read_csv(here("data/results/figure3_transcripts.csv"))

clusterOrder <- dgelist %>%
  cpm() %>%
  as.data.frame() %>%
  rownames_to_column(var = "Gene") %>%
  dplyr::filter(Gene %in% gene_tx_diff$Gene) %>%
  dplyr::arrange(Gene) %>%
  set_rownames(gene_tx_diff %>%
                 dplyr::filter(!Gene == "ENST00000372728") %>%
                 dplyr::arrange(Gene) %>%
                 dplyr::select(Gene) %>%
                 as.matrix() %>%
                 as.character()) %>%
  dplyr::select(-Gene) %>%
  as.matrix() %>%
  dist() %>%
  hclust() %>%
  as.dendrogram(method = "ward.D2") %>%
  order.dendrogram()

scaled_cpm <- dgelist %>%
  cpm(log = TRUE) %>%
  as.data.frame() %>%
  rownames_to_column(var = "Gene") %>%
  dplyr::filter(Gene %in% gene_tx_diff$Gene) %>%
  dplyr::arrange(Gene) %>%
  set_rownames(gene_tx_diff %>%
                 dplyr::filter(!Gene == "ENST00000372728") %>%
                 dplyr::arrange(Gene) %>%
                 dplyr::select(Gene) %>%
                 as.matrix() %>%
                 as.character()) %>%
  dplyr::select(-Gene) %>%
  as.matrix()
clusteredMatrix <- scaled_cpm[clusterOrder, ]
clusteredMatrix <- clusteredMatrix[, (dgelist$samples %>% 
                                        dplyr::arrange(GestationalAge) %>%
                                        dplyr::filter(
                                          ID %in% colnames(dgelist)
                                        ) %>%
                                        dplyr::arrange(GestationalAge) %>%
                                        dplyr::select(ID) %>%
                                        as.matrix() %>%
                                        as.character)]

gene_row_order <- c("GDPD5", "MGAT1", "ITSN1", "AZIN1", 
                "ADAM10", "HBA2", "SLC16A3", "PSG9",
                "CALM1", "MTUS1", "VMP1", "RPS24",
                "PSG5", "FLT1", "TFPI2")

gene_order <- oxygenGenes %>%
  dplyr::select("Gene",
                "GeneName") %>%
  dplyr::filter(GeneName %in% gene_row_order) %>%
  distinct() %>%
  column_to_rownames("GeneName")

clusteredMatrix <- clusteredMatrix %>%
  as.data.frame() %>%
  rownames_to_column(var = "Gene") %>%
  left_join(oxygenGenes %>%
              dplyr::select("Gene",
                            "GeneName"),
            by = "Gene") %>%
  dplyr::select(-Gene) %>%
  column_to_rownames("GeneName") %>%
  as.matrix()

ann_col <- dgelist$samples %>%
  dplyr::select(ID, "Gestation Group" = Timepoint) %>%
  as.data.frame() %>%
  set_rownames(dgelist$samples$ID) %>% 
  dplyr::select("Gestation Group")

dgelist$samples %>%
  dplyr::arrange(GestationalAge) %>%
  datatable()
pheatmap(clusteredMatrix[gene_row_order, ],
         color = plasma(84),
         border_color = NA,
         show_colnames = FALSE,
         cluster_cols = FALSE,
         cluster_rows = FALSE,
         gaps_col = 27,
         gaps_row = c(3,8,13),
         annotation_col = ann_col)

pheatmap(clusteredMatrix,
         color = plasma(84),
         border_color = NA,
         show_colnames = FALSE,
         cluster_cols = FALSE,
         cutree_rows = 4,
         gaps_col = 27,
         annotation_col = ann_col)

Figure S6 - FN1 and PSG9

fn1_gene <- plot_txp(
  dgelist = dgelist,
  target_gene = "FN1",
  variable = "GestationalAge",
  level = "gene",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

fn1_tx <- plot_txp(
  dgelist = dtelist,
  target_gene = "FN1",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

fn1_legend <- cowplot::get_legend(
  plot_txp(
    dgelist = dtelist,
    target_gene = "FN1",
    variable = "GestationalAge",
    level = "transcript",
    tpm_method = "tximport",
    path = here("data/counts/salmon_abundance.csv.gz"),
    keep_legend = TRUE
  ) +
    labs(x = "Gestational Age (weeks)")
)
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
top_row <- cowplot::plot_grid(fn1_gene,
                              fn1_tx,
                              fn1_legend,
                              nrow = 1,
                              rel_widths = c(1, 1, 0.4),
                              labels = c("A", "B", ""))
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
psg9_gene <- plot_txp(
  dgelist = dgelist,
  target_gene = "PSG9",
  variable = "GestationalAge",
  level = "gene",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

psg9_tx <- plot_txp(
  dgelist = dtelist,
  target_gene = "PSG9",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

psg9_legend <- cowplot::get_legend(
  plot_txp(
    dgelist = dtelist,
    target_gene = "PSG9",
    variable = "GestationalAge",
    level = "transcript",
    tpm_method = "tximport",
    path = here("data/counts/salmon_abundance.csv.gz"),
    keep_legend = TRUE
  ) +
    labs(x = "Gestational Age (weeks)")
)
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
bottom_row <- cowplot::plot_grid(psg9_gene,
                                 psg9_tx,
                                 psg9_legend,
                                 nrow = 1,
                                 rel_widths = c(1, 1, 0.4),
                                 labels = c("C", "D", ""))
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
cowplot::plot_grid(top_row,
                   bottom_row,
                   ncol = 1)

Figure S7 - VMP1 profile

txprofiler(dgelist = dgelist,
           dtelist = dtelist,
           target_gene = "VMP1",
           variable = "GestationalAge",
           transcript_variable = "GestationalAge",
           dtu_variable = "GestationGroup",
           abundance_path = here("data/counts/salmon_abundance.csv.gz"),
           tpm_method = "tximport",
           gene_x_label = "Gestational Age (weeks)",
           transcript_x_label = "Gestational Age (weeks)",
           dtu_x_label = "Gestation Group (weeks)")
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

Figure S8 - ASAH1 profile

txprofiler(dgelist = dgelist,
           dtelist = dtelist,
           target_gene = "ASAH1",
           variable = "GestationalAge",
           transcript_variable = "GestationalAge",
           dtu_variable = "GestationGroup",
           abundance_path = here("data/counts/salmon_abundance.csv.gz"),
           tpm_method = "tximport",
           gene_x_label = "Gestational Age (weeks)",
           transcript_x_label = "Gestational Age (weeks)",
           dtu_x_label = "Gestation Group (weeks)")

Figure S9 - GPR126 profile

txprofiler(dgelist = dgelist,
           dtelist = dtelist,
           target_gene = "GPR126",
           variable = "GestationalAge",
           transcript_variable = "GestationalAge",
           dtu_variable = "GestationGroup",
           abundance_path = here("data/counts/salmon_abundance.csv.gz"),
           tpm_method = "tximport",
           gene_x_label = "Gestational Age (weeks)",
           transcript_x_label = "Gestational Age (weeks)",
           dtu_x_label = "Gestation Group (weeks)")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

Figure S10 - TFPI2 profile

txprofiler(dgelist = dgelist,
           dtelist = dtelist,
           target_gene = "TFPI2",
           variable = "GestationalAge",
           transcript_variable = "GestationalAge",
           dtu_variable = "GestationGroup",
           abundance_path = here("data/counts/salmon_abundance.csv.gz"),
           tpm_method = "tximport",
           gene_x_label = "Gestational Age (weeks)",
           transcript_x_label = "Gestational Age (weeks)",
           dtu_x_label = "Gestation Group (weeks)")

Figure S11 - MTUS1, PTPRJ, RPS24, PSG6

mtus1_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "MTUS1",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

mtus1_prop <- plot_proportions(
  dtelist = dtelist,
  target_gene = "MTUS1",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)",
       fill = "Transcript")

mtus1_legend <- cowplot::get_legend(
  plot_txp(
    dgelist = dtelist,
    target_gene = "MTUS1",
    variable = "GestationalAge",
    level = "transcript",
    tpm_method = "standard",
    path = NULL,
    keep_legend = TRUE
  ) +
    labs(x = "Gestational Age (weeks)",
         fill = "Transcript",
         colour = "Transcript")
)

first_row <- cowplot::plot_grid(
  mtus1_exp,
  mtus1_prop,
  mtus1_legend,
  labels = c("A", "B", ""),
  nrow = 1,
  rel_widths = c(1, 1, 0.2)
)

ptprj_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "PTPRJ",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

ptprj_prop <- plot_proportions(
  dtelist = dtelist,
  target_gene = "PTPRJ",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)",
       fill = "Transcript")

ptprj_legend <- cowplot::get_legend(
  plot_txp(
    dgelist = dtelist,
    target_gene = "PTPRJ",
    variable = "GestationalAge",
    level = "transcript",
    tpm_method = "standard",
    path = NULL,
    keep_legend = TRUE
  ) +
    labs(x = "Gestational Age (weeks)",
         fill = "Transcript",
         colour = "Transcript")
)

second_row <- cowplot::plot_grid(
  ptprj_exp,
  ptprj_prop,
  ptprj_legend,
  labels = c("C", "D", ""),
  nrow = 1,
  rel_widths = c(1, 1, 0.2)
)


rps24_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "RPS24",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

rps24_prop <- plot_proportions(
  dtelist = dtelist,
  target_gene = "RPS24",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)",
       fill = "Transcript")

rps24_legend <- cowplot::get_legend(
  plot_txp(
    dgelist = dtelist,
    target_gene = "RPS24",
    variable = "GestationalAge",
    level = "transcript",
    tpm_method = "standard",
    path = NULL,
    keep_legend = TRUE
  ) +
    labs(x = "Gestational Age (weeks)",
         fill = "Transcript",
         colour = "Transcript")
)

third_row <- cowplot::plot_grid(
  rps24_exp,
  rps24_prop,
  rps24_legend,
  labels = c("E", "F", ""),
  nrow = 1,
  rel_widths = c(1, 1, 0.2)
)


psg6_exp <- plot_txp(
  dgelist = dtelist,
  target_gene = "PSG6",
  variable = "GestationalAge",
  level = "transcript",
  tpm_method = "standard",
  path = NULL,
  keep_legend = FALSE
) +
  labs(x = "Gestational Age (weeks)")

psg6_prop <- plot_proportions(
  dtelist = dtelist,
  target_gene = "PSG6",
  variable = "GestationGroup",
  tpm_method = "tximport",
  path = here("data/counts/salmon_abundance.csv.gz"),
  keep_legend = FALSE
) +
  labs(x = "Gestation Group (weeks)",
       fill = "Transcript") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.6))

psg6_legend <- cowplot::get_legend(
  plot_txp(
    dgelist = dtelist,
    target_gene = "PSG6",
    variable = "GestationalAge",
    level = "transcript",
    tpm_method = "standard",
    path = NULL,
    keep_legend = TRUE
  ) +
    labs(x = "Gestational Age (weeks)",
         fill = "Transcript",
         colour = "Transcript")
)

fourth_row <- cowplot::plot_grid(
  psg6_exp,
  psg6_prop,
  psg6_legend,
  labels = c("G", "H", ""),
  nrow = 1,
  rel_widths = c(1, 1, 0.2)
)

cowplot::plot_grid(
  first_row,
  second_row,
  third_row,
  fourth_row,
  ncol = 1
)